{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e2b8fbb7",
   "metadata": {},
   "outputs": [],
   "source": [
    "import jieba\n",
    "# https://github.com/fxsjy/jieba"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9e90c26c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache C:\\Users\\COLORFUL\\AppData\\Local\\Temp\\jieba.cache\n",
      "Loading model cost 0.412 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['麦当劳', '更名', '为金', '拱门']"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s=jieba.lcut('麦当劳更名为金拱门',cut_all=False)\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e63cf45",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<generator object Tokenizer.cut at 0x00000232AB1F2820>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 全模式分词\n",
    "s1=jieba.cut('麦当劳更名为金拱门',cut_all=False)\n",
    "s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0839f72f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['我',\n",
       " '来到',\n",
       " '北京',\n",
       " '清华',\n",
       " '华大',\n",
       " '大学',\n",
       " '清华大学',\n",
       " '参观',\n",
       " '了',\n",
       " '美丽',\n",
       " '的',\n",
       " '校园',\n",
       " '，',\n",
       " '并且',\n",
       " '聆听',\n",
       " '了',\n",
       " '知名',\n",
       " '教授',\n",
       " '的',\n",
       " '精彩',\n",
       " '讲座',\n",
       " '。']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s2=jieba.lcut_for_search('我来到北京清华大学参观了美丽的校园，并且聆听了知名教授的精彩讲座。')\n",
    "s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fc503107",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['他', '来到', '了', '网易', '杭研', '大厦']"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "s2=jieba.lcut('我来到北京清华大学参观了美丽的校园，并且聆听了知名教授的精彩讲座。')\n",
    "s2=jieba.lcut('他来到了网易杭研大厦')\n",
    "s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c56e9300",
   "metadata": {},
   "outputs": [],
   "source": [
    "import jieba"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "401204c3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[pair('我', 'r'), pair('爱', 'v'), pair('北京', 'ns'), pair('天安门', 'ns')]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "import jieba.posseg\n",
    "\n",
    "\n",
    "jieba.posseg.lcut('我爱北京天安门')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33618a41",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "torch11.3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
